body .main-container {
  max-width: 100% !important;
  width: 100% !important;
}
body {
  max-width: 100% !important;
}

body, td {
   font-size: 16px;
}
code.r{
  font-size: 14px;
}
pre {
  font-size: 14px
}

Let us set some global options for all code chunks in this document.

knitr::opts_chunk$set(
  message = FALSE,    # Disable messages printed by R code chunks
  warning = FALSE,    # Disable warnings printed by R code chunks
  echo = TRUE,        # Show R code within code chunks in output
  include = TRUE,     # Include both R code and its results in output
  eval = FALSE,       # Evaluate R code chunks
  cache = TRUE,       # Enable caching of R code chunks for faster rendering
  fig.align = "center",
  out.width = "100%",
  retina = 2
)

#set.seed(1982)
library(ggplot2)
library(gridExtra)
library(dplyr)
library(sf)
library(rmarkdown)
library(here)

We show all the scripts used to build the graph and generate the data up to this point. None of the scripts is actually run. They are just shown exactly how they were initially written. Some of the chunks are executed just to show some features of the objects.

1 Manipulate initial speed data

  1. This file is named Preprocessing/16Process_raw_data_with_ID.R and takes the original data (called Speed_Observations_SF.csv and obtained from this official website) and filters it as showed below.

  2. It produces Data_files/january_with_ID.RData.

1.0.1 Raw code

library(dplyr)
library(sf)

# bus numbers according to Wikipedia
busesnumber =  c(8501:8530, 8531:8560, 8601:8662, 8701:8750, 8751:8780, 
                 8800:8969, 6500:6554, 6560:6697, 6700:6730, 5701:5885, 7201:7293)


# read the raw data (which is not on the tracked repository)
raw = read.csv("~/Desktop/Spring 2024/Speed_Observations_SF.csv")

# get just day, hour, and speed in kph
january =  raw %>% 
  filter(Vehicle.ID %in% busesnumber) %>% # to choose only buses
  filter(Latitude > 37.7, Latitude < 37.815, Longitude > -122.52, Longitude < -122.36) %>% # to remove some weird location in front of Gabon in Africa
  st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326) %>%  # to transform it as an sf object
  mutate(datetime = strptime(Position.Date.Time, format="%m/%d/%Y %I:%M:%S %p")) %>%  # to format so we can manipulate later
  mutate(day = as.integer(format(datetime, format = "%d")), hour = as.integer(format(datetime, format = "%H"))) %>% # to get day and hour
  filter(Average.Speed < 73) %>% # to remove some atypical speed observations. 73 because we are allowing 10kph above the limit
  mutate(Average.Speed = Average.Speed*1.60934) %>% # to transform from mph to kph
  select(-Heading) %>% # to remove variable Position.Date.Time and Heading
  rename(speed = Average.Speed, ID = Vehicle.ID, PDT = Position.Date.Time) # to rename Average.Speed to speed so it is easier to write


save(january, file = "Data_files/january_with_ID.RData")

2 Remove consecutive zero speed observations

  1. This file is named Preprocessing/19Remove_zero_speed_for_long_periods.R and takes Data_files/january_with_ID.RData and removes consecutive zero speed observations (when the bus has stopped for a long period).

  2. It produces Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData.

2.0.1 Raw code

library(MetricGraph)
library(dplyr)
library(TSstudio)
library(ggplot2)
library(gganimate)
library(tsbox)


remove_consecutive_zeros <- function(vec) {
  # Initialize a result vector
  result <- numeric(length(vec))
  # Index for the result vector
  index <- 1
  # Flag to track if the first zero has been encountered
  first_zero <- FALSE
  # Loop through the original vector
  for (i in 1:length(vec)) {
    # If current value is not zero or previous value is not zero, or it's the first zero, add it to result
    if (vec[i] != 0 || (i > 1 && vec[i - 1] != 0) || i == 1) {
      result[index] <- vec[i]
      index <- index + 1
      # Reset first_zero flag if it's the first zero
      if (vec[i] == 0 && !first_zero) {
        first_zero <- TRUE
      }
    } else {
      # Replace consecutive zeros with NA after the first zero
      result[index] <- NA
      index <- index + 1
    }
  }
  # Trim the result vector to remove unused entries
  result <- result[1:(index - 1)]
  return(result)
}

# loading the data
load("~/Desktop/Spring 2024/january_with_ID.RData")


# just filtering
aux = january %>%
  filter(day %in% c(7,14,21,28), hour %in% c(13)) %>% # every Thursday of January 2021
  mutate(day = day/7) %>%
  dplyr::select(-PDT, -hour) 
  #distinct(geometry, .keep_all = TRUE) # to remove observations with the same location

buses_ID = unique(aux$ID)
days = 1:4

df = aux %>% 
  filter(ID == buses_ID[1], day == 1) %>% 
  arrange(datetime) %>%
  mutate(speed = remove_consecutive_zeros(speed)) %>%
  drop_na(speed)


for (i in 2:length(buses_ID)) {
  for (j in 1:length(days)) {
    check =  aux %>% filter(ID == buses_ID[i], day == days[j])
    if(nrow(check) > 0){
    tmp = check %>% 
      arrange(datetime) %>%
      mutate(speed = remove_consecutive_zeros(speed)) %>%
      drop_na(speed)
    df = rbind(tmp, df)
    }
  }
}

save(df, file = "Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData")

2.0.2 Explore the objects

load(here("Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData"))
df |> head(5) |> paged_table()
df |> dim()
## [1] 149575      5
ggplot() + geom_sf(data = df, aes(color = speed)) + theme_minimal()
Speed observations

Speed observations

3 Manipulate inital data from TomTom

  1. This file is named Preprocessing/9Creates_tomtom_dataset_from_shapefile_and_json.R and extracts data from both a .shp and a .json files to create an sf object that we use later to build the graph and get the value of some covariates.

  2. It produces Data_files/tomtom.RData.

3.0.1 Raw code

library(sf)
library(mapview)
library(jsonlite)
library(dplyr)

#tomtom_geojson = st_read("Data_files/San_Francisco_data_from_TomTom.geojson")

# read shape file
tomtom_sph = st_read("Data_files/San_Francisco_data_from_TomTom/network.shp")

# read json file and select the information of interest
tomtom_json = fromJSON("Data_files/San_Francisco_data_from_TomTom.json")$network$segmentResults$segmentTimeResults


# Initialize empty data frames for 10 and 12 columns
df_10_columns <- data.frame()
df_12_columns <- data.frame()

# Iterate through the list of data frames
for (i in seq_along(tomtom_json)) {
  # Check the number of columns in the current data frame
  num_cols <- ncol(tomtom_json[[i]])
  
  # Add list number column
  tomtom_json[[i]]$List_Number <- i
  
  # Append to the appropriate data frame based on the number of columns
  if (num_cols == 10) {
    df_10_columns <- rbind(df_10_columns, tomtom_json[[i]])
  } else if (num_cols == 12) {
    df_12_columns <- rbind(df_12_columns, tomtom_json[[i]])
  }
}

# add two columns with NA values so that we can rbin later
from_10_to_12 = df_10_columns %>% mutate(standardDeviationSpeed = NA, travelTimeStandardDeviation = NA)
  
# rbind and  order by List_Number
almost_tomtom = rbind(from_10_to_12, df_12_columns) %>% arrange(List_Number)

# join shape file and the dataset we get from the above process
casi_tomtom = bind_cols(tomtom_sph, almost_tomtom) %>% 
  mutate(FRC = as.character(FRC))

PERCENTILES = do.call(rbind, casi_tomtom$speedPercentiles) %>% as.data.frame()
names(PERCENTILES) =  paste(seq(5, 95, by = 5), "percentile", sep = "")

tomtom = bind_cols(casi_tomtom, PERCENTILES) %>% dplyr::select(-speedPercentiles)


# save the obtained data set
save(tomtom, file = "Data_files/tomtom.RData")

3.0.2 Explore the objects

load(here("Data_files/tomtom.RData"))
tomtom |> head(5) |> paged_table()
tomtom |> dim()
## [1] 67199    39
ggplot() + geom_sf(data = tomtom, aes(color = SpeedLimit)) + theme_minimal()
Speed Limits

Speed Limits

4 Build the graph and graph-process the speed data

  1. This file is named Preprocessing/28Window.case.file1.graph.builder.R and takes

    • Data_files/tomtom.RData

    • Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData.

  2. It defines polygon to cut the data and the network.

  3. Then it builds the graph and stores it as Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData.

  4. It also processes the data into graph-data and saves it as Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData.

4.0.1 Raw code

library(sf)
library(dplyr)
library(tidyr)
library(mapview)
library(MetricGraph)

# load network from tomtom
load("Data_files/tomtom.RData")
load("Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData") # just to get the crs  

################################################################################
#              build polygon to cut the network and the data                   #
################################################################################

# to define a polygon to cut the network and the data
polygon = st_multipoint(c(st_point(c(-122.42591, 37.80732)),
                          st_point(c(-122.40648, 37.81002)),
                          st_point(c(-122.38820, 37.78959)),
                          st_point(c(-122.38770, 37.78849)),
                          st_point(c(-122.38483, 37.76713)),
                          st_point(c(-122.39342, 37.76665)),
                          st_point(c(-122.39372, 37.76655)),
                          st_point(c(-122.41970, 37.76500)),
                          st_point(c(-122.42039, 37.77159)),
                          st_point(c(-122.42050, 37.77174)),
                          st_point(c(-122.42040, 37.77194)),
                          st_point(c(-122.41905, 37.77306)))) %>%
  st_cast("POLYGON") %>%
  st_sfc(crs = st_crs(df))

from.tomtom = tomtom %>%
  dplyr::select(-Id, -Segment.Id, -NewSegId, -timeSet, -dateRange, -standardDeviationSpeed, -travelTimeStandardDeviation) %>%
  filter(FRC != "7") %>% 
  mutate(value = SpeedLimit, road_type = paste("class_", FRC, sep = ""), aux = paste("class_", FRC, sep = "")) %>%
  pivot_wider(names_from = aux, values_from = value, values_fill = list(value = 0)) %>%
  mutate(upto1 = class_0 + class_1) %>%
  mutate(upto3 = upto1 + class_3) %>%
  mutate(upto4 = upto3 + class_4) %>%
  mutate(upto5 = upto4 + class_5) %>%
  mutate(upto6 = upto5 + class_6) %>%
  mutate(Length  = Length/1000) %>%  # in km
  mutate(density = sampleSize/Length) %>% # density whole day
  mutate(density_per_hour = density/24) %>% # density per hour
  st_transform(crs = st_crs(df)) %>%
  st_filter(x = ., y = polygon, .predicate = st_within)


# get the weights and edges
weights = from.tomtom %>% st_drop_geometry()
edges = from.tomtom$geometry

# building the graph
graph = graph_components$new(edges = edges, which_longlat = "sf", longlat = TRUE, edge_weights = weights)

# getting the largest connected component
sf_graph = graph$get_largest()

################################################################################
#                                   save the graph                             #
################################################################################

save(sf_graph, file = "Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData")


################################################################################
#                                 process the data                             #
################################################################################

data.reduced = st_filter(x = df, y = polygon, .predicate = st_within)

sf_graph$add_observations(data = data.reduced, group = "day", tolerance = 0.02, duplicated_strategy = "jitter") # tolerance = 40m

# getting the data from the graph (that is, in graph coordinates)
data_on_graph = sf_graph$get_data()

################################################################################
#                                   save the data                              #
################################################################################

save(data_on_graph, file = "Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData")

4.0.2 Explore the objects

load(here("Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData"))
data_on_graph |> head(5) |> paged_table()
data_on_graph |> dim()
## [1] 39575    10
load("~/Desktop/Spring 2024/Sanfrancisco_data_analysis/Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData")
sf_graph$plot(vertex_size = 0, 
              edge_width = 1, 
              edge_weight = "SpeedLimit", 
              edge_width_weight = "SpeedLimit", 
              edge_color = "SpeedLimit", 
              add_new_scale_weights = FALSE)
Speed Limits

Speed Limits

5 Get traffic signs information from OSM

  1. This file is named Preprocessing/5Traffic_stop_inclussion.R and downloads data from OSM and saves it as Networks object/traffic_stop.RData.

5.0.1 Raw code

library(mapview)
library(dplyr)
library(plotly)
library(osmdata)
library(sf)

bb = c(-122.53000, 37.69702, -122.37000 ,  37.82600) #c(-122.51362, 37.69702, -122.35779 ,  37.81130)
data = opq(bbox = bb) %>%
  add_osm_feature(key="highway", value=c("traffic_signals", "bus_stop", "stop", "crossing")) %>%
  osmdata_sf()

datapoints<- data$osm_points%>%
             filter(highway %in% c("traffic_signals", "bus_stop", "stop", "crossing")) %>%
              select(geometry, highway)

save(datapoints, file = "Networks object/traffic_stop.RData")

5.0.2 Explore the objects

load(here("Networks object/traffic_stop.RData"))
datapoints |> head(5) |> paged_table()
datapoints |> dim()
## [1] 21733     2
ggplot() + geom_sf(data = datapoints, aes(color = highway)) + theme_minimal()
Traffic signs

Traffic signs

datapoints$highway |> table() |> as.data.frame() |> paged_table()

6 Process traffic signs information into graph format.

  1. This file is named Preprocessing/28Window.case.file2.stop_signsprocessing.R and takes

    • Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData

    • Networks object/traffic_stop.RData

    • Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData.

  2. It processes the traffic signs information into graph format and saves it as Data_files/stops_data_on_graph_partialtomtom.RData.

6.0.1 Raw code

library(MetricGraph)
library(dplyr)
library(tidyr)
library(sf)
library(plotly)

load("Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData")
load("Networks object/traffic_stop.RData")
load("Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData") # just to get the crs  

################################################################################
#              build polygon to cut the network and the data                   #
################################################################################

# to define a polygon to cut the network and the data
polygon = st_multipoint(c(st_point(c(-122.42591, 37.80732)),
                          st_point(c(-122.40648, 37.81002)),
                          st_point(c(-122.38820, 37.78959)),
                          st_point(c(-122.38770, 37.78849)),
                          st_point(c(-122.38483, 37.76713)),
                          st_point(c(-122.39342, 37.76665)),
                          st_point(c(-122.39372, 37.76655)),
                          st_point(c(-122.41970, 37.76500)),
                          st_point(c(-122.42039, 37.77159)),
                          st_point(c(-122.42050, 37.77174)),
                          st_point(c(-122.42040, 37.77194)),
                          st_point(c(-122.41905, 37.77306)))) %>%
  st_cast("POLYGON") %>%
  st_sfc(crs = st_crs(df))

datapoints.reduced = datapoints %>% st_transform(crs = st_crs(df)) %>% st_filter(x = ., y = polygon, .predicate = st_within)


stops_obs <- sf_graph$add_observations(data = datapoints.reduced, tolerance = 0.003, clear_obs = TRUE) # tolerance 7m

traffic_stops = sf_graph$get_data()

save(traffic_stops, file="Data_files/stops_data_on_graph_partialtomtom.RData")

6.0.2 Explore the objects

load(here("Data_files/stops_data_on_graph_partialtomtom.RData"))
traffic_stops |> head(5) |> paged_table()
traffic_stops |> dim()
## [1] 2926    7
ggplot() + geom_point(data = traffic_stops, aes(x = .coord_x, y = .coord_y, color = highway)) + theme_minimal()
Traffic signs on graph

Traffic signs on graph

traffic_stops$highway |> table() |> as.data.frame() |> paged_table()

7 Create covariates for graph data

  1. This file is named Preprocessing/28Window.case.file3.addcovariates.R and takes

    • Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData

    • Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData

    • Data_files/stops_data_on_graph_partialtomtom.RData.

  2. It produces Data_files/data_on_graph_with_covariates_no_consecutive_zeros_partialtomtom.RData.

7.0.1 Raw code

library(MetricGraph)
library(dplyr)
# loading the data
load("Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData")
load("Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData")
load("Data_files/stops_data_on_graph_partialtomtom.RData")

to_remove = data_on_graph %>% filter(speed == 0, .distance_to_graph > 0.001) # removing zero speed observations that are 5m or more away from the graph
data_on_graph = setdiff(data_on_graph, to_remove) %>% filter(.distance_to_graph <= 0.003)


# length of each edge
edge_length = sf_graph$get_edge_lengths()

# matrix of vertices, the vertices in the i-th are the vertices that define edge i
vertex_matrix = sf_graph$E 



sf_graph$add_observations(data = data_on_graph, group = "day", clear_obs = TRUE)
sf_graph$edgeweight_to_data(data_loc = TRUE)

data_on_graph = sf_graph$get_data() %>% drop_na(-StreetName) # this drops all rows with at least one NA value but without taking into account StreetName

# getting the edge_number and distance_on_edge columns from data
data_on_graph_red = data.frame(edge_number = data_on_graph$.edge_number, 
                               distance_on_edge = data_on_graph$.distance_on_edge)


# defining function that decays from 1
fun = function(x){return(exp(-((x/0.05)^2)))}


# ------------------------------------------------------------------------------------------------------------------
# For bus_stop
# ------------------------------------------------------------------------------------------------------------------



# getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
sign_data = traffic_stops %>% 
  filter(highway == "bus_stop") %>% 
  as.data.frame() %>% 
  dplyr::select(.edge_number, .distance_on_edge) %>%
  rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)

# initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
edges = c()

borderline = 0.1
# looping over the signs


list_for_indices = list()
list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
list_for_max_values = list()
list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"

# ----------------------------------------------------------------------------------------------

for (k in 1:nrow(sign_data)) {
  # getting the sign at iteration i
  sign = sign_data[k,]
  # getting the edge that contains the sign
  edge_number = sign[[1]]
  # storing the edge_number into edges vector
  edges[k] = edge_number
  # getting the distance on the edge of the sign
  distance_on_edge = sign[[2]]
  
  original_distance = as.numeric(edge_length[edge_number])
  center_sign = original_distance*distance_on_edge
  vertices = vertex_matrix[edge_number,]
  
  if(center_sign <= borderline){
    # going
    going_edges = which(vertex_matrix[,1] %in% vertices[1])
    if(length(going_edges) > 0){
      for (i in 1:length(going_edges)) {
        center_going = -center_sign
        idx = which(data_on_graph_red$edge_number == going_edges[i])
        list_for_indices[[going_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
        eval = matrix(fun(aux - center_going), ncol = 1)
        list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
      }
    }
    
    # coming
    coming_edges = which(vertex_matrix[,2] %in% vertices[1])
    if(length(coming_edges) > 0){
      for (i in 1:length(coming_edges)) {
        center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
        idx = which(data_on_graph_red$edge_number == coming_edges[i])
        list_for_indices[[coming_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
        eval = matrix(fun(aux - center_coming), ncol = 1)
        list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
      }
    }
    
  }
  if(center_sign >= (original_distance- borderline)){
    # going
    going_edges = which(vertex_matrix[,1] %in% vertices[2])
    if(length(going_edges) > 0){
      for (i in 1:length(going_edges)) {
        center_going = -(original_distance - center_sign)
        idx = which(data_on_graph_red$edge_number == going_edges[i])
        list_for_indices[[going_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
        eval = matrix(fun(aux - center_going), ncol = 1)
        list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
      }
    }
    # coming
    coming_edges = which(vertex_matrix[,2] %in% vertices[2])
    if(length(coming_edges) > 0){
      for (i in 1:length(coming_edges)) {
        center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
        idx = which(data_on_graph_red$edge_number == coming_edges[i])
        list_for_indices[[coming_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
        eval = matrix(fun(aux - center_coming), ncol = 1)
        list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
      } # end for
    }# end if
  } # end else
}


#----------------------------------------------------------------------------------------------------
# we compute covariate at the points on each edge just taking into account only signs in the same edge
for (k in 1:nrow(sign_data)) {
  # getting the sign at iteration i
  sign = sign_data[k,]
  # getting the edge that contains the sign
  edge_number = sign[[1]]
  # storing the edge_number into edges vector
  edges[k] = edge_number
  # getting the distance on the edge of the sign
  distance_on_edge = sign[[2]]
  
  # indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
  idx = which(data_on_graph_red$edge_number == edge_number)
  list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
  # getting the distance of the points in the edge of interest
  aux = data_on_graph_red$distance_on_edge[idx]
  # computing the value of the covariate (with normalized and no normalized distance)
  eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
  list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
}

list_for_indices[[(sf_graph$nE + 1)]] = NULL
list_for_max_values[[(sf_graph$nE + 1)]] = NULL

# drop NULL members
filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]

new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]


# Creating number of signs covariate ----------------------------------------------------------

cov_number_sign = rep(0, nrow(data_on_graph_red))

for (i in 1:length(new_filtered_indices)) {
  if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
    cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
    new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
  }
}

# -------------------------------------------------------------------------------------------------

covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
                                   all_max_values = unlist(new_filtered_max_values))



final_cov = rep(0, nrow(data_on_graph_red))
final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values

final1 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)


# ------------------------------------------------------------------------------------------------------------------
# For traffic_signals
# ------------------------------------------------------------------------------------------------------------------


# getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)

sign_data = traffic_stops %>% 
  filter(highway == "traffic_signals") %>% 
  as.data.frame() %>% 
  dplyr::select(.edge_number, .distance_on_edge) %>%
  rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)

# initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
edges = c()

borderline = 0.05
# looping over the signs


list_for_indices = list()
list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
list_for_max_values = list()
list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"

# ----------------------------------------------------------------------------------------------

for (k in 1:nrow(sign_data)) {
  # getting the sign at iteration i
  sign = sign_data[k,]
  # getting the edge that contains the sign
  edge_number = sign[[1]]
  # storing the edge_number into edges vector
  edges[k] = edge_number
  # getting the distance on the edge of the sign
  distance_on_edge = sign[[2]]
  
  original_distance = as.numeric(edge_length[edge_number])
  center_sign = original_distance*distance_on_edge
  vertices = vertex_matrix[edge_number,]
  
  if(center_sign <= borderline){
    # going
    going_edges = which(vertex_matrix[,1] %in% vertices[1])
    if(length(going_edges) > 0){
      for (i in 1:length(going_edges)) {
        center_going = -center_sign
        idx = which(data_on_graph_red$edge_number == going_edges[i])
        list_for_indices[[going_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
        eval = matrix(fun(aux - center_going), ncol = 1)
        list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
      }
    }
    
    # coming
    coming_edges = which(vertex_matrix[,2] %in% vertices[1])
    if(length(coming_edges) > 0){
      for (i in 1:length(coming_edges)) {
        center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
        idx = which(data_on_graph_red$edge_number == coming_edges[i])
        list_for_indices[[coming_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
        eval = matrix(fun(aux - center_coming), ncol = 1)
        list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
      }
    }
    
  }
  if(center_sign >= (original_distance- borderline)){
    # going
    going_edges = which(vertex_matrix[,1] %in% vertices[2])
    if(length(going_edges) > 0){
      for (i in 1:length(going_edges)) {
        center_going = -(original_distance - center_sign)
        idx = which(data_on_graph_red$edge_number == going_edges[i])
        list_for_indices[[going_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
        eval = matrix(fun(aux - center_going), ncol = 1)
        list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
      }
    }
    # coming
    coming_edges = which(vertex_matrix[,2] %in% vertices[2])
    if(length(coming_edges) > 0){
      for (i in 1:length(coming_edges)) {
        center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
        idx = which(data_on_graph_red$edge_number == coming_edges[i])
        list_for_indices[[coming_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
        eval = matrix(fun(aux - center_coming), ncol = 1)
        list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
      } # end for
    }# end if
  } # end else
}


#----------------------------------------------------------------------------------------------------
# we compute covariate at the points on each edge just taking into account only signs in the same edge
for (k in 1:nrow(sign_data)) {
  # getting the sign at iteration i
  sign = sign_data[k,]
  # getting the edge that contains the sign
  edge_number = sign[[1]]
  # storing the edge_number into edges vector
  edges[k] = edge_number
  # getting the distance on the edge of the sign
  distance_on_edge = sign[[2]]
  
  # indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
  idx = which(data_on_graph_red$edge_number == edge_number)
  list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
  # getting the distance of the points in the edge of interest
  aux = data_on_graph_red$distance_on_edge[idx]
  # computing the value of the covariate (with normalized and no normalized distance)
  eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
  list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
}

list_for_indices[[(sf_graph$nE + 1)]] = NULL
list_for_max_values[[(sf_graph$nE + 1)]] = NULL

# drop NULL members
filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]

new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]


# Creating number of signs covariate ----------------------------------------------------------

cov_number_sign = rep(0, nrow(data_on_graph_red))

for (i in 1:length(new_filtered_indices)) {
  if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
    cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
    new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
  }
}

# -------------------------------------------------------------------------------------------------

covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
                                   all_max_values = unlist(new_filtered_max_values))



final_cov = rep(0, nrow(data_on_graph_red))
final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values

final2 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)


# ------------------------------------------------------------------------------------------------------------------
# For stop
# ------------------------------------------------------------------------------------------------------------------


# getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)

sign_data = traffic_stops %>% 
  filter(highway == "stop") %>% 
  as.data.frame() %>% 
  dplyr::select(.edge_number, .distance_on_edge) %>%
  rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)

# initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
edges = c()

borderline = 0.05
# looping over the signs


list_for_indices = list()
list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
list_for_max_values = list()
list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"

# ----------------------------------------------------------------------------------------------

for (k in 1:nrow(sign_data)) {
  # getting the sign at iteration i
  sign = sign_data[k,]
  # getting the edge that contains the sign
  edge_number = sign[[1]]
  # storing the edge_number into edges vector
  edges[k] = edge_number
  # getting the distance on the edge of the sign
  distance_on_edge = sign[[2]]
  
  original_distance = as.numeric(edge_length[edge_number])
  center_sign = original_distance*distance_on_edge
  vertices = vertex_matrix[edge_number,]
  
  if(center_sign <= borderline){
    # going
    going_edges = which(vertex_matrix[,1] %in% vertices[1])
    if(length(going_edges) > 0){
      for (i in 1:length(going_edges)) {
        center_going = -center_sign
        idx = which(data_on_graph_red$edge_number == going_edges[i])
        list_for_indices[[going_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
        eval = matrix(fun(aux - center_going), ncol = 1)
        list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
      }
    }
    
    # coming
    coming_edges = which(vertex_matrix[,2] %in% vertices[1])
    if(length(coming_edges) > 0){
      for (i in 1:length(coming_edges)) {
        center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
        idx = which(data_on_graph_red$edge_number == coming_edges[i])
        list_for_indices[[coming_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
        eval = matrix(fun(aux - center_coming), ncol = 1)
        list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
      }
    }
    
  }
  if(center_sign >= (original_distance- borderline)){
    # going
    going_edges = which(vertex_matrix[,1] %in% vertices[2])
    if(length(going_edges) > 0){
      for (i in 1:length(going_edges)) {
        center_going = -(original_distance - center_sign)
        idx = which(data_on_graph_red$edge_number == going_edges[i])
        list_for_indices[[going_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
        eval = matrix(fun(aux - center_going), ncol = 1)
        list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
      }
    }
    # coming
    coming_edges = which(vertex_matrix[,2] %in% vertices[2])
    if(length(coming_edges) > 0){
      for (i in 1:length(coming_edges)) {
        center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
        idx = which(data_on_graph_red$edge_number == coming_edges[i])
        list_for_indices[[coming_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
        eval = matrix(fun(aux - center_coming), ncol = 1)
        list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
      } # end for
    }# end if
  } # end else
}


#----------------------------------------------------------------------------------------------------
# we compute covariate at the points on each edge just taking into account only signs in the same edge
for (k in 1:nrow(sign_data)) {
  # getting the sign at iteration i
  sign = sign_data[k,]
  # getting the edge that contains the sign
  edge_number = sign[[1]]
  # storing the edge_number into edges vector
  edges[k] = edge_number
  # getting the distance on the edge of the sign
  distance_on_edge = sign[[2]]
  
  # indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
  idx = which(data_on_graph_red$edge_number == edge_number)
  list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
  # getting the distance of the points in the edge of interest
  aux = data_on_graph_red$distance_on_edge[idx]
  # computing the value of the covariate (with normalized and no normalized distance)
  eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
  list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
}

list_for_indices[[(sf_graph$nE + 1)]] = NULL
list_for_max_values[[(sf_graph$nE + 1)]] = NULL

# drop NULL members
filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]

new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]


# Creating number of signs covariate ----------------------------------------------------------

cov_number_sign = rep(0, nrow(data_on_graph_red))

for (i in 1:length(new_filtered_indices)) {
  if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
    cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
    new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
  }
}

# -------------------------------------------------------------------------------------------------

covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
                                   all_max_values = unlist(new_filtered_max_values))



final_cov = rep(0, nrow(data_on_graph_red))
final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values

final3 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)



# ------------------------------------------------------------------------------------------------------------------
# For crossing
# ------------------------------------------------------------------------------------------------------------------


# getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)

sign_data = traffic_stops %>% 
  filter(highway == "crossing") %>% 
  as.data.frame() %>% 
  dplyr::select(.edge_number, .distance_on_edge) %>%
  rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)

# initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
edges = c()

borderline = 0.05
# looping over the signs


list_for_indices = list()
list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
list_for_max_values = list()
list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"

# ----------------------------------------------------------------------------------------------

for (k in 1:nrow(sign_data)) {
  # getting the sign at iteration i
  sign = sign_data[k,]
  # getting the edge that contains the sign
  edge_number = sign[[1]]
  # storing the edge_number into edges vector
  edges[k] = edge_number
  # getting the distance on the edge of the sign
  distance_on_edge = sign[[2]]
  
  original_distance = as.numeric(edge_length[edge_number])
  center_sign = original_distance*distance_on_edge
  vertices = vertex_matrix[edge_number,]
  
  if(center_sign <= borderline){
    # going
    going_edges = which(vertex_matrix[,1] %in% vertices[1])
    if(length(going_edges) > 0){
      for (i in 1:length(going_edges)) {
        center_going = -center_sign
        idx = which(data_on_graph_red$edge_number == going_edges[i])
        list_for_indices[[going_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
        eval = matrix(fun(aux - center_going), ncol = 1)
        list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
      }
    }
    
    # coming
    coming_edges = which(vertex_matrix[,2] %in% vertices[1])
    if(length(coming_edges) > 0){
      for (i in 1:length(coming_edges)) {
        center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
        idx = which(data_on_graph_red$edge_number == coming_edges[i])
        list_for_indices[[coming_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
        eval = matrix(fun(aux - center_coming), ncol = 1)
        list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
      }
    }
    
  }
  if(center_sign >= (original_distance- borderline)){
    # going
    going_edges = which(vertex_matrix[,1] %in% vertices[2])
    if(length(going_edges) > 0){
      for (i in 1:length(going_edges)) {
        center_going = -(original_distance - center_sign)
        idx = which(data_on_graph_red$edge_number == going_edges[i])
        list_for_indices[[going_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
        eval = matrix(fun(aux - center_going), ncol = 1)
        list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
      }
    }
    # coming
    coming_edges = which(vertex_matrix[,2] %in% vertices[2])
    if(length(coming_edges) > 0){
      for (i in 1:length(coming_edges)) {
        center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
        idx = which(data_on_graph_red$edge_number == coming_edges[i])
        list_for_indices[[coming_edges[i]]] = idx
        aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
        eval = matrix(fun(aux - center_coming), ncol = 1)
        list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
      } # end for
    }# end if
  } # end else
}


#----------------------------------------------------------------------------------------------------
# we compute covariate at the points on each edge just taking into account only signs in the same edge
for (k in 1:nrow(sign_data)) {
  # getting the sign at iteration i
  sign = sign_data[k,]
  # getting the edge that contains the sign
  edge_number = sign[[1]]
  # storing the edge_number into edges vector
  edges[k] = edge_number
  # getting the distance on the edge of the sign
  distance_on_edge = sign[[2]]
  
  # indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
  idx = which(data_on_graph_red$edge_number == edge_number)
  list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
  # getting the distance of the points in the edge of interest
  aux = data_on_graph_red$distance_on_edge[idx]
  # computing the value of the covariate (with normalized and no normalized distance)
  eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
  list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
}

list_for_indices[[(sf_graph$nE + 1)]] = NULL
list_for_max_values[[(sf_graph$nE + 1)]] = NULL

# drop NULL members
filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]

new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]


# Creating number of signs covariate ----------------------------------------------------------

cov_number_sign = rep(0, nrow(data_on_graph_red))

for (i in 1:length(new_filtered_indices)) {
  if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
    cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
    new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
  }
}

# -------------------------------------------------------------------------------------------------

covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
                                   all_max_values = unlist(new_filtered_max_values))



final_cov = rep(0, nrow(data_on_graph_red))
final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values

final4 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)

cov_obs <- data_on_graph[[".distance_on_edge"]]
cov_obs <- 2 * ifelse(cov_obs > 0.5,  1 - cov_obs, cov_obs)

data_on_graph_with_covariates = data_on_graph %>% mutate(bus = final1$final_cov,
                                                         bus_number = final1$cov_number_sign,
                                                         signal = final2$final_cov,
                                                         signal_number = final2$cov_number_sign,
                                                         stop = final3$final_cov,
                                                         stop_number = final3$cov_number_sign,
                                                         crossing = final4$final_cov,
                                                         crossing_number = final4$cov_number_sign,
                                                         cov_obs = cov_obs)


save(data_on_graph_with_covariates, file = "Data_files/data_on_graph_with_covariates_no_consecutive_zeros_partialtomtom.RData")

7.0.2 Explore the objects

load(here("Data_files/data_on_graph_with_covariates_no_consecutive_zeros_partialtomtom.RData"))
data_on_graph_with_covariates |> head(5) |> paged_table()
data_on_graph_with_covariates |> dim()
## [1] 14534    64
ggplot() + geom_point(data = data_on_graph_with_covariates, aes(x = .coord_x, y = .coord_y, color = SpeedLimit)) + theme_minimal()
Speed Limits on speed observations

Speed Limits on speed observations

8 Create covariates for graph mesh

  1. This file is named Preprocessing/28Window.case.file4.addcovariatesonmesh.R and it defines a function that takes h and uses

    • Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData

    • Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData

    • Data_files/stops_data_on_graph_partialtomtom.RData.

  2. It produces Data_files/data_on_mesh_with_covariates_partialtomtom.RData.

8.0.1 Raw code

creates_covariates_on_mesh = function(h){
  
  library(MetricGraph)
  library(dplyr)
  # loading the data
  load("Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData")
  load("Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData")
  load("Data_files/stops_data_on_graph_partialtomtom.RData")
  
  # length of each edge
  edge_length = sf_graph$get_edge_lengths()
  
  # matrix of vertices, the vertices in the i-th are the vertices that define edge i
  vertex_matrix = sf_graph$E 
  
  sf_graph$add_observations(data = data_on_graph, group = "day", clear_obs = TRUE)
  
  sf_graph$build_mesh(h = h)
  
  data_on_mesh = sf_graph$edgeweight_to_data(mesh = TRUE, add = FALSE, return = TRUE) %>% filter(.group == 1)
  
  data_on_graph = data_on_mesh
  
  # getting the edge_number and distance_on_edge columns from data
  data_on_graph_red = data.frame(edge_number = data_on_graph$.edge_number, 
                                 distance_on_edge = data_on_graph$.distance_on_edge)
  
  
  # defining function that decays from 1
  fun = function(x){return(exp(-((x/0.05)^2)))}
  
  
  # ------------------------------------------------------------------------------------------------------------------
  # For bus_stop
  # ------------------------------------------------------------------------------------------------------------------
  
  
  
  # getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
  sign_data = traffic_stops %>% 
    filter(highway == "bus_stop") %>% 
    as.data.frame() %>% 
    dplyr::select(.edge_number, .distance_on_edge) %>%
    rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)
  
  # initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
  edges = c()
  
  borderline = 0.1
  # looping over the signs
  
  
  list_for_indices = list()
  list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
  list_for_max_values = list()
  list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"
  
  # ----------------------------------------------------------------------------------------------
  
  for (k in 1:nrow(sign_data)) {
    # getting the sign at iteration i
    sign = sign_data[k,]
    # getting the edge that contains the sign
    edge_number = sign[[1]]
    # storing the edge_number into edges vector
    edges[k] = edge_number
    # getting the distance on the edge of the sign
    distance_on_edge = sign[[2]]
    
    original_distance = as.numeric(edge_length[edge_number])
    center_sign = original_distance*distance_on_edge
    vertices = vertex_matrix[edge_number,]
    
    if(center_sign <= borderline){
      # going
      going_edges = which(vertex_matrix[,1] %in% vertices[1])
      if(length(going_edges) > 0){
        for (i in 1:length(going_edges)) {
          center_going = -center_sign
          idx = which(data_on_graph_red$edge_number == going_edges[i])
          list_for_indices[[going_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
          eval = matrix(fun(aux - center_going), ncol = 1)
          list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
        }
      }
      
      # coming
      coming_edges = which(vertex_matrix[,2] %in% vertices[1])
      if(length(coming_edges) > 0){
        for (i in 1:length(coming_edges)) {
          center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
          idx = which(data_on_graph_red$edge_number == coming_edges[i])
          list_for_indices[[coming_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
          eval = matrix(fun(aux - center_coming), ncol = 1)
          list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
        }
      }
      
    }
    if(center_sign >= (original_distance- borderline)){
      # going
      going_edges = which(vertex_matrix[,1] %in% vertices[2])
      if(length(going_edges) > 0){
        for (i in 1:length(going_edges)) {
          center_going = -(original_distance - center_sign)
          idx = which(data_on_graph_red$edge_number == going_edges[i])
          list_for_indices[[going_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
          eval = matrix(fun(aux - center_going), ncol = 1)
          list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
        }
      }
      # coming
      coming_edges = which(vertex_matrix[,2] %in% vertices[2])
      if(length(coming_edges) > 0){
        for (i in 1:length(coming_edges)) {
          center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
          idx = which(data_on_graph_red$edge_number == coming_edges[i])
          list_for_indices[[coming_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
          eval = matrix(fun(aux - center_coming), ncol = 1)
          list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
        } # end for
      }# end if
    } # end else
  }
  
  
  #----------------------------------------------------------------------------------------------------
  # we compute covariate at the points on each edge just taking into account only signs in the same edge
  for (k in 1:nrow(sign_data)) {
    # getting the sign at iteration i
    sign = sign_data[k,]
    # getting the edge that contains the sign
    edge_number = sign[[1]]
    # storing the edge_number into edges vector
    edges[k] = edge_number
    # getting the distance on the edge of the sign
    distance_on_edge = sign[[2]]
    
    # indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
    idx = which(data_on_graph_red$edge_number == edge_number)
    list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
    # getting the distance of the points in the edge of interest
    aux = data_on_graph_red$distance_on_edge[idx]
    # computing the value of the covariate (with normalized and no normalized distance)
    eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
    list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
  }
  
  list_for_indices[[(sf_graph$nE + 1)]] = NULL
  list_for_max_values[[(sf_graph$nE + 1)]] = NULL
  
  # drop NULL members
  filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
  filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]
  
  new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
  new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]
  
  
  # Creating number of signs covariate ----------------------------------------------------------
  
  cov_number_sign = rep(0, nrow(data_on_graph_red))
  
  for (i in 1:length(new_filtered_indices)) {
    if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
      cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
      new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
    }
  }
  
  # -------------------------------------------------------------------------------------------------
  
  covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
                                     all_max_values = unlist(new_filtered_max_values))
  
  
  
  final_cov = rep(0, nrow(data_on_graph_red))
  final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values
  
  final1 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)
  
  
  # ------------------------------------------------------------------------------------------------------------------
  # For traffic_signals
  # ------------------------------------------------------------------------------------------------------------------
  
  
  # getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
  
  sign_data = traffic_stops %>% 
    filter(highway == "traffic_signals") %>% 
    as.data.frame() %>% 
    dplyr::select(.edge_number, .distance_on_edge) %>%
    rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)
  
  # initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
  edges = c()
  
  borderline = 0.05
  # looping over the signs
  
  
  list_for_indices = list()
  list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
  list_for_max_values = list()
  list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"
  
  # ----------------------------------------------------------------------------------------------
  
  for (k in 1:nrow(sign_data)) {
    # getting the sign at iteration i
    sign = sign_data[k,]
    # getting the edge that contains the sign
    edge_number = sign[[1]]
    # storing the edge_number into edges vector
    edges[k] = edge_number
    # getting the distance on the edge of the sign
    distance_on_edge = sign[[2]]
    
    original_distance = as.numeric(edge_length[edge_number])
    center_sign = original_distance*distance_on_edge
    vertices = vertex_matrix[edge_number,]
    
    if(center_sign <= borderline){
      # going
      going_edges = which(vertex_matrix[,1] %in% vertices[1])
      if(length(going_edges) > 0){
        for (i in 1:length(going_edges)) {
          center_going = -center_sign
          idx = which(data_on_graph_red$edge_number == going_edges[i])
          list_for_indices[[going_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
          eval = matrix(fun(aux - center_going), ncol = 1)
          list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
        }
      }
      
      # coming
      coming_edges = which(vertex_matrix[,2] %in% vertices[1])
      if(length(coming_edges) > 0){
        for (i in 1:length(coming_edges)) {
          center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
          idx = which(data_on_graph_red$edge_number == coming_edges[i])
          list_for_indices[[coming_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
          eval = matrix(fun(aux - center_coming), ncol = 1)
          list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
        }
      }
      
    }
    if(center_sign >= (original_distance- borderline)){
      # going
      going_edges = which(vertex_matrix[,1] %in% vertices[2])
      if(length(going_edges) > 0){
        for (i in 1:length(going_edges)) {
          center_going = -(original_distance - center_sign)
          idx = which(data_on_graph_red$edge_number == going_edges[i])
          list_for_indices[[going_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
          eval = matrix(fun(aux - center_going), ncol = 1)
          list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
        }
      }
      # coming
      coming_edges = which(vertex_matrix[,2] %in% vertices[2])
      if(length(coming_edges) > 0){
        for (i in 1:length(coming_edges)) {
          center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
          idx = which(data_on_graph_red$edge_number == coming_edges[i])
          list_for_indices[[coming_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
          eval = matrix(fun(aux - center_coming), ncol = 1)
          list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
        } # end for
      }# end if
    } # end else
  }
  
  
  #----------------------------------------------------------------------------------------------------
  # we compute covariate at the points on each edge just taking into account only signs in the same edge
  for (k in 1:nrow(sign_data)) {
    # getting the sign at iteration i
    sign = sign_data[k,]
    # getting the edge that contains the sign
    edge_number = sign[[1]]
    # storing the edge_number into edges vector
    edges[k] = edge_number
    # getting the distance on the edge of the sign
    distance_on_edge = sign[[2]]
    
    # indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
    idx = which(data_on_graph_red$edge_number == edge_number)
    list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
    # getting the distance of the points in the edge of interest
    aux = data_on_graph_red$distance_on_edge[idx]
    # computing the value of the covariate (with normalized and no normalized distance)
    eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
    list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
  }
  
  list_for_indices[[(sf_graph$nE + 1)]] = NULL
  list_for_max_values[[(sf_graph$nE + 1)]] = NULL
  
  # drop NULL members
  filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
  filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]
  
  new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
  new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]
  
  
  # Creating number of signs covariate ----------------------------------------------------------
  
  cov_number_sign = rep(0, nrow(data_on_graph_red))
  
  for (i in 1:length(new_filtered_indices)) {
    if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
      cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
      new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
    }
  }
  
  # -------------------------------------------------------------------------------------------------
  
  covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
                                     all_max_values = unlist(new_filtered_max_values))
  
  
  
  final_cov = rep(0, nrow(data_on_graph_red))
  final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values
  
  final2 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)
  
  
  # ------------------------------------------------------------------------------------------------------------------
  # For stop
  # ------------------------------------------------------------------------------------------------------------------
  
  
  # getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
  
  sign_data = traffic_stops %>% 
    filter(highway == "stop") %>% 
    as.data.frame() %>% 
    dplyr::select(.edge_number, .distance_on_edge) %>%
    rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)
  
  # initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
  edges = c()
  
  borderline = 0.05
  # looping over the signs
  
  
  list_for_indices = list()
  list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
  list_for_max_values = list()
  list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"
  
  # ----------------------------------------------------------------------------------------------
  
  for (k in 1:nrow(sign_data)) {
    # getting the sign at iteration i
    sign = sign_data[k,]
    # getting the edge that contains the sign
    edge_number = sign[[1]]
    # storing the edge_number into edges vector
    edges[k] = edge_number
    # getting the distance on the edge of the sign
    distance_on_edge = sign[[2]]
    
    original_distance = as.numeric(edge_length[edge_number])
    center_sign = original_distance*distance_on_edge
    vertices = vertex_matrix[edge_number,]
    
    if(center_sign <= borderline){
      # going
      going_edges = which(vertex_matrix[,1] %in% vertices[1])
      if(length(going_edges) > 0){
        for (i in 1:length(going_edges)) {
          center_going = -center_sign
          idx = which(data_on_graph_red$edge_number == going_edges[i])
          list_for_indices[[going_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
          eval = matrix(fun(aux - center_going), ncol = 1)
          list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
        }
      }
      
      # coming
      coming_edges = which(vertex_matrix[,2] %in% vertices[1])
      if(length(coming_edges) > 0){
        for (i in 1:length(coming_edges)) {
          center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
          idx = which(data_on_graph_red$edge_number == coming_edges[i])
          list_for_indices[[coming_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
          eval = matrix(fun(aux - center_coming), ncol = 1)
          list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
        }
      }
      
    }
    if(center_sign >= (original_distance- borderline)){
      # going
      going_edges = which(vertex_matrix[,1] %in% vertices[2])
      if(length(going_edges) > 0){
        for (i in 1:length(going_edges)) {
          center_going = -(original_distance - center_sign)
          idx = which(data_on_graph_red$edge_number == going_edges[i])
          list_for_indices[[going_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
          eval = matrix(fun(aux - center_going), ncol = 1)
          list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
        }
      }
      # coming
      coming_edges = which(vertex_matrix[,2] %in% vertices[2])
      if(length(coming_edges) > 0){
        for (i in 1:length(coming_edges)) {
          center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
          idx = which(data_on_graph_red$edge_number == coming_edges[i])
          list_for_indices[[coming_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
          eval = matrix(fun(aux - center_coming), ncol = 1)
          list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
        } # end for
      }# end if
    } # end else
  }
  
  
  #----------------------------------------------------------------------------------------------------
  # we compute covariate at the points on each edge just taking into account only signs in the same edge
  for (k in 1:nrow(sign_data)) {
    # getting the sign at iteration i
    sign = sign_data[k,]
    # getting the edge that contains the sign
    edge_number = sign[[1]]
    # storing the edge_number into edges vector
    edges[k] = edge_number
    # getting the distance on the edge of the sign
    distance_on_edge = sign[[2]]
    
    # indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
    idx = which(data_on_graph_red$edge_number == edge_number)
    list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
    # getting the distance of the points in the edge of interest
    aux = data_on_graph_red$distance_on_edge[idx]
    # computing the value of the covariate (with normalized and no normalized distance)
    eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
    list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
  }
  
  list_for_indices[[(sf_graph$nE + 1)]] = NULL
  list_for_max_values[[(sf_graph$nE + 1)]] = NULL
  
  # drop NULL members
  filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
  filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]
  
  new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
  new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]
  
  
  # Creating number of signs covariate ----------------------------------------------------------
  
  cov_number_sign = rep(0, nrow(data_on_graph_red))
  
  for (i in 1:length(new_filtered_indices)) {
    if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
      cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
      new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
    }
  }
  
  # -------------------------------------------------------------------------------------------------
  
  covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
                                     all_max_values = unlist(new_filtered_max_values))
  
  
  
  final_cov = rep(0, nrow(data_on_graph_red))
  final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values
  
  final3 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)
  
  
  
  # ------------------------------------------------------------------------------------------------------------------
  # For crossing
  # ------------------------------------------------------------------------------------------------------------------
  
  
  # getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
  
  sign_data = traffic_stops %>% 
    filter(highway == "crossing") %>% 
    as.data.frame() %>% 
    dplyr::select(.edge_number, .distance_on_edge) %>%
    rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)
  
  # initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
  edges = c()
  
  borderline = 0.05
  # looping over the signs
  
  
  list_for_indices = list()
  list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
  list_for_max_values = list()
  list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"
  
  # ----------------------------------------------------------------------------------------------
  
  for (k in 1:nrow(sign_data)) {
    # getting the sign at iteration i
    sign = sign_data[k,]
    # getting the edge that contains the sign
    edge_number = sign[[1]]
    # storing the edge_number into edges vector
    edges[k] = edge_number
    # getting the distance on the edge of the sign
    distance_on_edge = sign[[2]]
    
    original_distance = as.numeric(edge_length[edge_number])
    center_sign = original_distance*distance_on_edge
    vertices = vertex_matrix[edge_number,]
    
    if(center_sign <= borderline){
      # going
      going_edges = which(vertex_matrix[,1] %in% vertices[1])
      if(length(going_edges) > 0){
        for (i in 1:length(going_edges)) {
          center_going = -center_sign
          idx = which(data_on_graph_red$edge_number == going_edges[i])
          list_for_indices[[going_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
          eval = matrix(fun(aux - center_going), ncol = 1)
          list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
        }
      }
      
      # coming
      coming_edges = which(vertex_matrix[,2] %in% vertices[1])
      if(length(coming_edges) > 0){
        for (i in 1:length(coming_edges)) {
          center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
          idx = which(data_on_graph_red$edge_number == coming_edges[i])
          list_for_indices[[coming_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
          eval = matrix(fun(aux - center_coming), ncol = 1)
          list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
        }
      }
      
    }
    if(center_sign >= (original_distance- borderline)){
      # going
      going_edges = which(vertex_matrix[,1] %in% vertices[2])
      if(length(going_edges) > 0){
        for (i in 1:length(going_edges)) {
          center_going = -(original_distance - center_sign)
          idx = which(data_on_graph_red$edge_number == going_edges[i])
          list_for_indices[[going_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
          eval = matrix(fun(aux - center_going), ncol = 1)
          list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
        }
      }
      # coming
      coming_edges = which(vertex_matrix[,2] %in% vertices[2])
      if(length(coming_edges) > 0){
        for (i in 1:length(coming_edges)) {
          center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
          idx = which(data_on_graph_red$edge_number == coming_edges[i])
          list_for_indices[[coming_edges[i]]] = idx
          aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
          eval = matrix(fun(aux - center_coming), ncol = 1)
          list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
        } # end for
      }# end if
    } # end else
  }
  
  
  #----------------------------------------------------------------------------------------------------
  # we compute covariate at the points on each edge just taking into account only signs in the same edge
  for (k in 1:nrow(sign_data)) {
    # getting the sign at iteration i
    sign = sign_data[k,]
    # getting the edge that contains the sign
    edge_number = sign[[1]]
    # storing the edge_number into edges vector
    edges[k] = edge_number
    # getting the distance on the edge of the sign
    distance_on_edge = sign[[2]]
    
    # indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
    idx = which(data_on_graph_red$edge_number == edge_number)
    list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
    # getting the distance of the points in the edge of interest
    aux = data_on_graph_red$distance_on_edge[idx]
    # computing the value of the covariate (with normalized and no normalized distance)
    eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
    list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
  }
  
  list_for_indices[[(sf_graph$nE + 1)]] = NULL
  list_for_max_values[[(sf_graph$nE + 1)]] = NULL
  
  # drop NULL members
  filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
  filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]
  
  new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
  new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]
  
  
  # Creating number of signs covariate ----------------------------------------------------------
  
  cov_number_sign = rep(0, nrow(data_on_graph_red))
  
  for (i in 1:length(new_filtered_indices)) {
    if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
      cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
      new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
    }
  }
  
  # -------------------------------------------------------------------------------------------------
  
  covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
                                     all_max_values = unlist(new_filtered_max_values))
  
  
  
  final_cov = rep(0, nrow(data_on_graph_red))
  final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values
  
  final4 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)
  
  cov_obs <- data_on_graph[[".distance_on_edge"]]
  cov_obs <- 2 * ifelse(cov_obs > 0.5,  1 - cov_obs, cov_obs)
  
  data_on_mesh_with_covariates = data_on_graph %>% mutate(bus = final1$final_cov,
                                                          bus_number = final1$cov_number_sign,
                                                          signal = final2$final_cov,
                                                          signal_number = final2$cov_number_sign,
                                                          stop = final3$final_cov,
                                                          stop_number = final3$cov_number_sign,
                                                          crossing = final4$final_cov,
                                                          crossing_number = final4$cov_number_sign,
                                                          cov_obs = cov_obs)
  
  
  save(data_on_mesh_with_covariates, file = "Data_files/data_on_mesh_with_covariates_partialtomtom.RData")
  
  
}

8.0.2 Explore the objects

load(here("Data_files/data_on_mesh_with_covariates_partialtomtom.RData"))
data_on_mesh_with_covariates |> head(5) |> paged_table()
data_on_mesh_with_covariates |> dim()
## [1] 7169   59
ggplot() + geom_point(data = data_on_mesh_with_covariates, aes(x = .coord_x, y = .coord_y, color = SpeedLimit)) + theme_minimal()
Speed Limits on mesh nodes

Speed Limits on mesh nodes